if (require("PageRank")) {
library(PageRank)
}else{
devtools::install_github("ryangreenup/PageRank")
library(PageRank)
}
library(pacman)
pacman::p_load(PageRank, devtools, Matrix, igraph, mise, tidyverse, rgl, latex2exp)
# mise()
Define some constants
n <- 20
p <- 1:n/n
beta <- 1:n/n
beta <- runif(n)*100
#sz <- 1:n/n+10
sz <- (1:n/n)*100+10
input_var <- expand.grid("n" = n, "p" = p, "beta" = beta, "size" = sz)
input_var
random_graph <- function(n, p, beta, size) {
g1 <- igraph::erdos.renyi.game(n = sz, p)
A <- igraph::get.adjacency(g1) # Row to column
A <- Matrix::t(A)
A_dens <- mean(A)
T <- PageRank::power_walk_prob_trans(A)
tr <- sum(diag(T))
e2 <- eigen(T, only.values = TRUE)$values[2] # R orders by descending magnitude
A_det <- det(T)
return(c(abs(e2), A_dens, A_det, tr))
}
Map the function
chival <- dchisq(seq(from = 0, to = 40, length.out = 100), df = 10)*7
index <- seq(from = 0, to = 2.2, length.out = 100)
chidata <- data.frame(index = index, chi = chival)
ggplot(data) +
geom_point(mapping = aes(x = density, y = eigenvalue2, size = beta, color = size, shape = factor(n))) +
scale_size_continuous(range = c(0.1,1)) +
labs(x = "Density of Adjacency Matrix", y = TeX("$\\xi_2$ of $T_{PW}$"))
mod <- lm(eigenvalue2 ~ poly(density, 1), data = data)
mod$residuals %>% hist(breaks = 90)
hist(rnorm(3000), breaks = 100)
plot(mod)
NA
This is interesting, lets look at a snapshot of it:
library(scatterplot3d)
shapes = c(16, 17, 18)
shapes <- shapes[as.numeric(iris$Species)]
scatterplot3d(iris[,1:3], pch = shapes)
d <- data[,names(data) %in% c("eigenvalue2", "density", "determinant")]
scatterplot3d(d, angle = 10)
library(plotly)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Attaching package: ‘plotly’
The following object is masked from ‘package:latex2exp’:
TeX
The following object is masked from ‘package:ggplot2’:
last_plot
The following object is masked from ‘package:igraph’:
groups
The following object is masked from ‘package:stats’:
filter
The following object is masked from ‘package:graphics’:
layout
mtcars$am[which(mtcars$am == 0)] <- 'Automatic'
mtcars$am[which(mtcars$am == 1)] <- 'Manual'
mtcars$am <- as.factor(mtcars$am)
d <- data[sample(1:nrow(data), 1000),]
fig <- plot_ly(d, x = ~determinant, y = ~density, z = ~eigenvalue2)
fig <- fig %>% add_markers(size = 1)
fig <- fig %>% layout(scene = list(xaxis = list(title = 'Weight'),
yaxis = list(title = 'Gross horsepower'),
zaxis = list(title = '1/4 mile time')))
fig
`arrange_()` is deprecated as of dplyr 0.7.0.
Please use `arrange()` instead.
See vignette('programming') for more help
This warning is displayed once every 8 hours.
Call `lifecycle::last_warnings()` to see where this warning was generated.
OK so nothing great, well the spread is heteroskedastic so let’s log transform it:
ggplot(data, aes(x = density, y = log(eigenvalue2))) +
geom_point(mapping = aes(size = size, color = p, shape = factor(n))) +
# stat_smooth() +
scale_size_continuous(range = c(0.1,1.5)) +
labs(x = "Density of Adjacency Matrix", y = "Second Eigenvalue of Power Walk Transition Probability Matrix")
We could probably model this with a quadratic:
mod_x1 <- lm(log(eigenvalue2) ~ poly(density, 1), data = data)
data$x1 <- predict(mod_x1)
mod_x2 <- lm(log(eigenvalue2) ~ poly(density, 2), data = data)
data$x2 <- predict(mod_x2)
mod_x3 <- lm(log(eigenvalue2) ~ poly(density, 3), data = data)
data$x3 <- predict(mod_x3)
mod_x4 <- lm(log(eigenvalue2) ~ poly(density, 4), data = data)
data$x4 <- predict(mod_x4)
mod_x5 <- lm(log(eigenvalue2) ~ poly(density, 5), data = data)
data$x5 <- predict(mod_x5)
mod_x5 <- lm(log(eigenvalue2) ~ poly(density, 5), data = data)
data$x5 <- predict(mod_x5)
mod_x6 <- lm(log(eigenvalue2) ~ poly(density, 6), data = data)
data$x6 <- predict(mod_x6)
mod_xl <- lm(log(eigenvalue2) ~ log(1-density), data = data)
data$xl <- predict(mod_xl)
max(mod_xl$residuals)*2
[1] 0.8245406
# TODO Change the colour of each model by using pivot_longer
# TODO Make a plot of degree vs RSS, comment that the lack of elbo is evidence to regect
# TODO Try a negative log, any luck? with that
# TODO Does this vary by beta?
# TODO Write it up in a report
# DONE What about a sqrt transform and then a linear model?
# Leaves too much variance
ggplot(data, aes(x = density, y = log(eigenvalue2))) +
geom_point(mapping = aes(size = size, alpha = 0.01, color = size, shape = factor(n))) +
# stat_smooth() +
scale_size_continuous(range = c(0.1,1.5)) +
labs(x = "Density of Adjacency Matrix [ mean(A) ]", y = TeX("$\\log\\left( \\xi_2 \\right)$ of T")) +
geom_line(aes(x = density, y = x1, lwd = 2)) +
geom_line(aes(x = density, y = xl, lwd = 3))
# geom_line(aes(x = density, y = x3, lwd = 0.5)) +
# geom_line(aes(x = density, y = x4, lwd = 0.5)) +
# geom_line(aes(x = density, y = x5, lwd = 0.5)) +
# geom_line(aes(x = density, y = x6, lwd = 36))
ggplot(data, aes(x = trace_w , y = log(eigenvalue2))) +
geom_point(mapping = aes(size = size, color = p, shape = factor(n))) +
# stat_smooth() +
scale_size_continuous(range = c(0.1,1.5)) +
labs(x = "Trace of Transition Matrix", y = TeX("$\\log\\left( \\xi_2 \\right)$ of \\mathbf{W}"))
mod_df <- data
mod_hyp <- lm(log(eigenvalue2) ~ I(trace_w^(-1)), data = data)
mod_df$hyp <- predict(mod_hyp)
mod_log <- lm(log(eigenvalue2) ~ log(trace_w), data = data)
mod_df$log <- predict(mod_log)
ggplot(mod_df, aes(x = trace_w, y = log(eigenvalue2))) +
geom_point(mapping = aes(size = size, alpha = 0.01, color = size, shape = factor(n))) +
# stat_smooth() +
scale_size_continuous(range = c(0.1,1.5)) +
labs(x = "Trace of A", y = TeX("$\\log\\left( \\xi_2 \\right)$ of T")) +
geom_line(aes(x = trace_w, y = hyp, lwd = 2)) +
geom_line(aes(x = trace_w, y = log, lwd = 2))
mod_df_long <- pivot_longer(mod_df, cols = c(hyp, log), names_to = "Model_Type", values_to = "eigenvalue2_mod")
mod_df_long$eigenvalue2_log <- log(mod_df_long$eigenvalue2)
ggplot(mod_df_long, aes(x = trace_w)) +
geom_point(shape = 23, aes(y = eigenvalue2_log), fill = "lightblue", col = "black", size = 0.7, alpha = 0.4) +
geom_line(aes(y = eigenvalue2_mod, col = Model_Type), size = 1) +
labs(col = c("Model \nType")) +
scale_color_manual(labels = c("Hyperbolic", "Logarithmic"),
values = c("indianred", "royalblue")) +
theme_linedraw()
NOPE
chival <- dchisq(seq(from = 0, to = 40, length.out = 100), df = 10)*7
index <- seq(from = 0, to = 2.2, length.out = 100)
chidata <- data.frame(index = index, chi = chival)
ggplot(data) +
geom_point(mapping = aes(x = density, y = eigenvalue2, size = beta, color = size, shape = factor(n))) +
geom_line(data = chidata, mapping = aes(x = index, y = chi)) +
scale_size_continuous(range = c(0.1,1)) +
labs(x = "Density of Adjacency Matrix", y = "Second Eigenvalue of Power Walk Transition Probability Matrix")
constants:
n <- 20
p <- 1:n/n
beta <- 1:n/n
beta <- runif(n)*100
sz <- ((1:n)/n)*100+10
input_var <- expand.grid("n" = n, "p" = p, "beta" = beta, "size" = sz)
functions:
random_graph <- function(n, p, beta, size) {
g1 <- igraph::erdos.renyi.game(n = sz, p)
A <- igraph::get.adjacency(g1) # Row to column
A <- Matrix::t(A)
A_dens <- mean(A)
T <- PageRank::power_walk_prob_trans(A)
e2 <- eigen(T, only.values = TRUE)$values[2] # R orders by descending magnitude
A_det <- det(A)
return(c(abs(abs(e2)-0.4), abs(A_det), A_dens))
}
Map the function
nc <- length(random_graph(1, 1, 1, 1))
Y <- matrix(ncol = nc, nrow = nrow(input_var))
for (i in 1:nrow(input_var)) {
X <- as.vector(input_var[i,])
Y[i,] <- random_graph(X$n, X$p, X$beta, X$size)
}
if (sum(abs(Y) != abs(Re(Y))) == 0) {
Y <- Re(Y)
}
nrow(input_var)
nrow(Y)
Y <- as.data.frame(Y); colnames(Y) <- c("eigenvalue2", "determinant")
data <- cbind(input_var, Y)
ggplot(data) +
geom_point(mapping = aes(x = determinant, y = eigenvalue2, size = size, color = beta, shape = factor(n))) +
scale_size_continuous(range = c(0.1,1)) +
labs(y = "||e2|-0.4|", x = TeX("$\\left\\lvert A \\right\\rvert $"))
g1 <- igraph::erdos.renyi.game(n = sz, p)
coords <- layout_with_fr(g1, dim = 3)
# plot(g1)
rglplot(g1, layout=coords, size = 0.1)
## Not run:
g <- make_lattice( c(5,5,5) )
coords <- layout_with_fr(g, dim=3)
rglplot(g, layout=coords)
## End(Not run)
n <- sz <- size <- 10^3
p <- 0.
g1 <- igraph::erdos.renyi.game(n = sz, p)
A <- igraph::get.adjacency(g1) # Row to column
A <- Matrix::t(A)
det(A)
ggplot(data) +
geom_point(mapping = aes(x = size, y = determinant, size = size, color = beta, shape = factor(n))) +
scale_size_continuous(range = c(0.1,1)) +
labs(x = "size", y = "determinant")
constants:
n <- 10
p <- 1:n/n
beta <- 1:n/n
beta <- runif(n)*100
sz <- 1:n/n+100
input_var <- expand.grid("n" = n, "p" = p, "beta" = beta, "size" = sz)
functions:
random_graph <- function(n, p, beta, size) {
g1 <- igraph::erdos.renyi.game(n = sz, p)
A <- igraph::get.adjacency(g1) # Row to column
A <- Matrix::t(A)
A_dens <- mean(A)
T <- PageRank::power_walk_prob_trans(A)
e2 <- eigen(T, only.values = TRUE)$values[2] # R orders by descending magnitude
A_det <- det(A)
return(c(abs(e2), A_det, A_dens))
}
Map the function
nc <- length(random_graph(1, 1, 1, 1))
Y <- matrix(ncol = nc, nrow = nrow(input_var))
for (i in 1:nrow(input_var)) {
X <- as.vector(input_var[i,])
Y[i,] <- random_graph(X$n, X$p, X$beta, X$size)
}
if (sum(abs(Y) != abs(Re(Y))) == 0) {
Y <- Re(Y)
}
nrow(input_var)
nrow(Y)
Y <- as.data.frame(Y); colnames(Y) <- c("eigenvalue2", "determinant")
data <- cbind(input_var, Y)
chival <- dchisq(seq(from = 0, to = 40, length.out = 100), df = 10)*7
index <- seq(from = 0, to = 2.2, length.out = 100)
chidata <- data.frame(index = index, chi = chival)
ggplot(data) +
geom_point(mapping = aes(x = determinant, y = eigenvalue2, size = size, color = beta, shape = factor(n))) +
scale_size_continuous(range = c(0.1,1)) +
labs(x = "Density of Adjacency Matrix", y = "Second Eigenvalue of Power Walk Transition Probability Matrix")
g1 <- igraph::erdos.renyi.game(n = sz, p)
coords <- layout_with_fr(g1, dim = 3)
# plot(g1)
# rglplot(g1, layout=coords, size = 0.1)
## Not run:
g <- make_lattice( c(5,5,5) )
coords <- layout_with_fr(g, dim=3)
rglplot(g, layout=coords)
## End(Not run)
n <- sz <- size <- 100
p <- 0.4
g1 <- igraph::erdos.renyi.game(n = sz, p)
A <- igraph::get.adjacency(g1) # Row to column
A <- Matrix::t(A)
det(A)